The effect of using hyper parameter continuity


In [ ]:
%load_ext autoreload
%autoreload 2

In [ ]:
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
import matplotlib.pyplot as plt
import seaborn as sns; sns.set() # prettify matplotlib

import numpy as np
import sklearn.gaussian_process as gp

In [ ]:
# local modules
import turbo as tb
import turbo.modules as tm
import turbo.plotting as tp
import turbo.gui.jupyter as tg

In [ ]:
surrogate = 'scikit' # 'GPy' or 'scikit'

Function to optimize:


In [ ]:
#f = lambda x: 1 * x * np.cos(x)
f = lambda x: 100 * np.sin(x**2/5) * np.cos(x*1.5) + 100
xmin, xmax = 0, 12
xs = np.linspace(xmin, xmax, num=200)

ys = f(xs)
best_y = np.min(ys)
best_x = xs[np.argmin(ys)]

In [ ]:
plt.figure(figsize=(12, 4))
plt.plot(xs, ys, 'g-', label='objective')
plt.plot(best_x, best_y, 'bo', label='optima')
plt.legend(loc='upper left')
plt.margins(0.01, 0.1)
plt.xlabel(r'$x$')
plt.ylabel(r'$f(x)$')
plt.show()

In [ ]:
def create_optimiser(param_continuity):
    bounds = [ ('x', xmin, xmax) ]

    op = tb.Optimiser(f, 'min', bounds, pre_phase_trials=4)
    op.latent_space = tm.NoLatentSpace()
    op.pre_phase_select = tm.random_selector()
    op.aux_optimiser = tm.RandomAndQuasiNewton(num_random=100, grad_restarts=5)
    if surrogate == 'GPy':
        op.surrogate = tm.GPySurrogate(training_iterations=5, param_continuity=param_continuity)
    elif surrogate == 'scikit':
        op.surrogate = tm.SciKitGPSurrogate(model_params=dict(
            alpha = 1e-3, # larger => more noise. Default = 1e-10
            kernel = 1.0 * gp.kernels.RBF(),
            normalize_y = True,
            copy_X_train = True # make a copy of the training data
        ), training_iterations=5, param_continuity=param_continuity)
    else:
        raise ValueError()
    op.acquisition = tm.UCB(beta=3)
    return op

def compare(a, b, action, vertical):
    rows, cols = (2, 1) if vertical else (1, 2)
    size= (12, 5) if vertical else (12, 5)
    fig, (a1, a2) = plt.subplots(rows, cols, figsize=size, sharex=True, sharey=True)
    action(a, (fig, a1))
    action(b, (fig, a2))
    a1.set_title(a1.get_title() + ' - without continuity')
    a2.set_title(a2.get_title() + ' - with continuity')
    fig.subplots_adjust(hspace=0.5)

In [ ]:
a = create_optimiser(False)
ra = tb.Recorder(a)

b = create_optimiser(True)
rb = tb.Recorder(b)

In [ ]:
np.random.seed(100)
tg.OptimiserProgressBar(a)
a.run(max_trials=25)

np.random.seed(100)
tg.OptimiserProgressBar(b)
b.run(max_trials=25)

In [ ]:
compare(ra, rb, lambda rec, fig_ax:
    tp.plot_timings(rec, fig_ax=fig_ax), vertical=True)

In [ ]:
compare(ra, rb, lambda rec, fig_ax:
    tp.plot_error(rec, true_best=best_y, fig_ax=fig_ax), vertical=True)

In [ ]:
compare(ra, rb, lambda rec, fig_ax:
    tp.plot_surrogate_likelihood(rec, fig_ax=fig_ax), vertical=True)

In [ ]:
compare(ra, rb, lambda rec, fig_ax:
    tp.plot_surrogate_hyper_params_1D(rec, param_index=1, axes=('trial_num', 'param', 'likelihood'), fig_ax=fig_ax), vertical=False)

In [ ]:
compare(ra, rb, lambda rec, fig_ax:
    tp.plot_surrogate_hyper_params_1D(rec, param_index=1, axes=('param', 'likelihood', 'trial_num'), fig_ax=fig_ax), vertical=False)

In [ ]:
compare(ra, rb, lambda rec, fig_ax:
    tp.plot_surrogate_hyper_params_1D(rec, param_index=1, axes=('trial_num', 'likelihood', 'param'), fig_ax=fig_ax), vertical=False)

In [ ]:
compare(ra, rb, lambda rec, fig_ax:
    tp.plot_surrogate_hyper_params_2D(rec, fig_ax=fig_ax, log_scale=True), vertical=False)

In [ ]:
tp.interactive_plot_trial_1D(ra, param='x', trial_num=None, true_objective=f)

In [ ]:
tp.interactive_plot_trial_1D(rb, param='x', trial_num=None, true_objective=f)

In [ ]: